
fn_identifying_stocks <- function(df_input, group, dep_var, indep_var_long, indep_var_short) {
  
  if(nrow(df_input) <= 1){
    
    output <- list(group, 0, NA,
                   NA, NA, NA, NA, NA, NA,
                   NA, NA, NA, NA)
    return(output)
  }
  
  f_long  <- paste(dep_var, paste(indep_var_long,  collapse = " + "), sep = " ~ ")
  f_short <- paste(dep_var, paste(indep_var_short, collapse = " + "), sep = " ~ ")
  
  # Running regressions
  f_long  <- as.formula(f_long)
  f_short <- as.formula(f_short)
  
  reg_long  <- lm(f_long,  data = df_input)
  reg_short <- lm(f_short, data = df_input)
  
  # Outputs
  
  lev_long  <- hat(model.matrix(reg_long))  # Leverage of each observation
  lev_short <- hat(model.matrix(reg_short)) # Leverage of each observation
  
  R2_long       <- summary(reg_long)$r.squared
  R2_short      <- summary(reg_short)$r.squared
  beta_0        <- summary(reg_long)$coefficients[2] # should be [1] without constant
  beta_1        <- summary(reg_long)$coefficients[3] # should be [2] without constant
  zeta_0        <- summary(reg_short)$coefficients[2]
  var_eps       <- summary(reg_long)$sigma^2
  outlier_long  <- max(lev_long)  # Greatest outlier
  outlier_short <- max(lev_short) # Greatest outlier
  tau_pi        <- beta_1^2/var_eps
  tau_pi_R      <- (R2_long - R2_short)/(1 - R2_short)
  
  n_obs  <- df_input %>% nrow() %>% as.numeric()
  
  n_factors <- length(reg_long$coefficients) - length(reg_long$model) + 2
  
  output <- list(group, n_obs, n_factors,
                 R2_long, R2_short, beta_0, beta_1, zeta_0, var_eps,
                 outlier_long, outlier_short, tau_pi, tau_pi_R)
  
  return(output)
}

fn_identifying_stocks_PS <- function(df_input, dep_var, indep_var_long, indep_var_short) {
  
  df <- df_input
  
  f_long  <- paste(dep_var, paste(indep_var_long,  collapse = " + "), sep = " ~ ")
  f_short <- paste(dep_var, paste(indep_var_short, collapse = " + "), sep = " ~ ")
  
  # Running regressions
  f_long  <- as.formula(f_long)
  f_short <- as.formula(f_short)
  
  reg_long  <- lm(f_long,  data = df)
  reg_short <- lm(f_short, data = df)
  
  # Outputs
  df$epsilon_long <- reg_long$residuals
  df$epsilon_short <- reg_short$residuals
  
  return(list(recovered=df, reg_long_PS=reg_long,reg_short_PS=reg_short))
  
}